library(readr)
library(dplyr)
library(stringr)
library(ggplot2)
library(lubridate)
library(tidyr)
library(shiny)
library(plotly)
data=read_csv("./data/weekly_deaths_by_state_and_causes.csv")
general_data <- data |>
janitor::clean_names() |>
rename_with(~ str_replace_all(., " ", "_")) |>
filter(jurisdiction_of_occurrence =="United States") |>
rename_with(~ make.unique(str_replace(., "_\\w\\d.*", ""))) |>
mutate(month = month(week_ending_date)) |>
rename( covid_multiple_cause=covid,
covid_underlying_cause=covid.1,
symptoms_not_classified=symptoms_signs_and_abnormal_clinical_and_laboratory_findings_not_elsewhere_classified
)
p_data <- read.delim("./data/Population by States.txt",
header = TRUE, stringsAsFactors = FALSE) |>
janitor::clean_names()
population_summary <- p_data |>
filter(year_code >= 2020 & year_code <= 2023) |>
group_by(year_code) |>
summarise(Total_Population = sum(population, na.rm = TRUE)) |>
rename(mmwr_year=year_code)
death_trends_data = general_data |>
select(
week_ending_date,
all_cause,
natural_cause,
jurisdiction_of_occurrence,
mmwr_year,
septicemia,
malignant_neoplasms,
diabetes_mellitus,
alzheimer_disease,
influenza_and_pneumonia,
chronic_lower_respiratory_diseases,
other_diseases_of_respiratory_system,
nephritis_nephrotic_syndrome_and_nephrosis,
symptoms_not_classified,
diseases_of_heart,
cerebrovascular_diseases,
covid_multiple_cause,
covid_underlying_cause
) |>
gather(key = "cause_of_death", value = "death_count",
septicemia,
all_cause,
natural_cause,
malignant_neoplasms,
diabetes_mellitus,
alzheimer_disease,
influenza_and_pneumonia,
chronic_lower_respiratory_diseases,
other_diseases_of_respiratory_system,
nephritis_nephrotic_syndrome_and_nephrosis,
symptoms_not_classified,
diseases_of_heart,
cerebrovascular_diseases,
covid_multiple_cause,
covid_underlying_cause) |>
group_by(week_ending_date, cause_of_death) |>
summarize(total_deaths = sum(death_count, na.rm = TRUE)) |>
ungroup()
death_trends_data2 = death_trends_data |>
mutate(mmwr_year = as.numeric(format(week_ending_date, "%Y"))) |>
left_join(population_summary, by = "mmwr_year") |>
mutate(death_rate = total_deaths / Total_Population) |>
mutate(death_rate_percent = death_rate * 100)
death_trends_plot = plot_ly(
data = death_trends_data,
x = ~week_ending_date,
y = ~total_deaths,
color = ~cause_of_death,
colors = RColorBrewer::brewer.pal(12, "Set3"),
type = 'scatter',
mode = 'lines+markers',
line = list(shape = 'linear', opacity = 0.6),
marker = list(opacity = 0.6)
) |>
layout(
title = list(
text = "Mortality Trends Over Time for Different Causes of Death",
x = 0.5
),
xaxis = list(title = "Week Ending Date"),
yaxis = list(title = "Total Deaths"),
legend = list(title = list(text = "Cause of Death"),
font = list(size = 6)),
plot_bgcolor = 'rgba(240,240,240,0.9)'
)
death_trends_plot
death_trends_plot2 <- plot_ly(
data = death_trends_data2,
x = ~week_ending_date,
y = ~death_rate_percent,
color = ~cause_of_death,
colors = RColorBrewer::brewer.pal(12, "Set3"),
type = 'scatter',
mode = 'lines+markers',
line = list(shape = 'linear', opacity = 0.6),
marker = list(opacity = 0.6)
) |>
layout(
title = "Proportion of Deaths to Population Over Time by Cause of Death",
xaxis = list(title = "Week Ending Date"),
yaxis = list(title = "Deaths as Percentage of Population (%)"),
legend = list(
title = list(text = "Cause of Death"),
font = list(size = 6)
),
plot_bgcolor = 'rgba(240,240,240,0.9)'
)
death_trends_plot2
The plot reveals several significant peaks in the number and rate of all-cause and natural-cause deaths between 2020 and 2022. Interestingly, these peaks align with surges in COVID-related deaths, suggesting a strong correlation between COVID-19 and both total mortality and natural-cause mortality during this period. ## Proportion of death plot
death_proportion_data =general_data |> select(
week_ending_date,
mmwr_year,
covid_multiple_cause,
covid_underlying_cause,
septicemia,
malignant_neoplasms,
diabetes_mellitus,
alzheimer_disease,
influenza_and_pneumonia,
chronic_lower_respiratory_diseases,
other_diseases_of_respiratory_system,
nephritis_nephrotic_syndrome_and_nephrosis,
symptoms_not_classified,
diseases_of_heart,
cerebrovascular_diseases
) |>
gather(key = "cause_of_death", value = "death_count",
covid_multiple_cause,
covid_underlying_cause,
septicemia,
malignant_neoplasms,
diabetes_mellitus,
alzheimer_disease,
influenza_and_pneumonia,
chronic_lower_respiratory_diseases,
other_diseases_of_respiratory_system,
nephritis_nephrotic_syndrome_and_nephrosis,
symptoms_not_classified,
diseases_of_heart,
cerebrovascular_diseases
) |>
group_by(week_ending_date, mmwr_year, cause_of_death) |>
summarize(death_count = sum(death_count, na.rm = TRUE)) |>
group_by(week_ending_date, mmwr_year) |>
mutate(total_deaths = sum(death_count, na.rm = TRUE)) |>
mutate(proportion = death_count / total_deaths) |>
arrange(week_ending_date, cause_of_death, desc(proportion))|>
ungroup()
proportion_plot <- plot_ly(
data = death_proportion_data,
x = ~week_ending_date,
y = ~proportion,
color = ~cause_of_death,
colors = RColorBrewer::brewer.pal(12, "Set3"),
type = 'bar',
hoverinfo = 'x+y+name',
marker = list(opacity = 0.7)
) |>
layout(
title = "Changes in the Proportion of Each Cause of Death in Total Mortality Over Four Years",
xaxis = list(
title = "Week Ending Date",
tickformat = "%Y",
dtick = 31536000000
),
yaxis = list(
title = "Proportion of Total Deaths"
),
legend = list(
orientation = "v",
x = 1.2,
y=1,
xanchor = "center",
font = list(size = 5)
),
plot_bgcolor = 'rgba(240,240,240,0.9)',
margin = list(t = 50),
showlegend = TRUE,
barmode = 'stack'
)
proportion_plot
proportion_plot2 <- plot_ly(
data = death_proportion_data,
x = ~week_ending_date,
y = ~proportion,
color = ~cause_of_death,
colors = RColorBrewer::brewer.pal(12, "Set3"),
type = 'scatter',
mode = 'lines',
line = list(opacity = 0.7, width = 2),
hoverinfo = 'x+y+name'
) |>
layout(
title = "Changes in the Proportion of Each Cause of Death Over Four Years",
xaxis = list(
title = "Week Ending Date",
tickformat = "%Y",
dtick = 31536000000
),
yaxis = list(
title = "Proportion of Total Deaths",
tickformat = ".1%"
),
legend = list(
orientation = "v",
x = 1.3,
y = 1,
xanchor = "center",
font = list(size = 8)
),
plot_bgcolor = 'rgba(240,240,240,0.9)',
margin = list(t = 50),
showlegend = TRUE
)
proportion_plot2
proportion_plot3 <- plot_ly(
data = death_proportion_data,
x = ~week_ending_date,
y = ~proportion,
color = ~cause_of_death,
colors = RColorBrewer::brewer.pal(12, "Set3"),
type = 'scatter',
mode = 'lines',
fill = 'tozeroy',
line = list(opacity = 1, width = 2),
hoverinfo = 'x+y+name'
) |>
layout(
title = "Proportion of Each Cause of Death Over Time",
xaxis = list(
title = "Week Ending Date",
tickformat = "%Y",
dtick = 31536000000
),
yaxis = list(
title = "Proportion of Total Deaths",
tickformat = ".1%"
),
legend = list(
orientation = "v",
x = 1.3,
y = 1,
xanchor = "center",
font = list(size = 8)
),
plot_bgcolor = 'rgba(240,240,240,0.9)',
margin = list(t = 50),
showlegend = TRUE
)
proportion_plot3
The plots above show that among all natural causes of death, excluding COVID-19, the highest proportions are attributed to heart disease and malignant neoplasms. Notably, in 2020 and 2021, these two causes exhibited an inverse relationship with COVID-19 deaths. This phenomenon could be explained by evidence of COVID-19 infection, whether virological or clinical, in the days or weeks leading up to death in patients with heart disease or malignant neoplasms. Such deaths were likely coded as COVID-19 during certification, resulting in an apparent decrease in the reported numbers of deaths from heart disease and malignant neoplasms. ### Pie-chart
pie_data <- death_proportion_data |>
filter(!cause_of_death %in% c("all_cause", "natural_cause"))
# Define UI for Shiny app
ui <- fluidPage(
titlePanel("Death Proportions for Different Years"),
# Layout: two rows, each containing two pie charts
fluidRow(
column(6, plotlyOutput("pie_2020")),
column(6, plotlyOutput("pie_2021"))
),
fluidRow(
column(6, plotlyOutput("pie_2022")),
column(6, plotlyOutput("pie_2023"))
)
)
# Define server for Shiny app
server <- function(input, output, session) {
# Function to create a pie chart for each year
pie_chart <- function(year) {
pie_plot <- pie_data |>
filter(year == year) |>
plot_ly(
labels = ~cause_of_death,
values = ~proportion,
type = 'pie',
textinfo = 'label+percent',
showlegend = TRUE
) |>
layout(
title = paste("Proportion of Death Causes in", year),
showlegend = FALSE)
return(pie_plot)
}
# Render the pie chart for each year
output$pie_2020 = renderPlotly({ pie_chart(2020) })
output$pie_2021 = renderPlotly({ pie_chart(2021) })
output$pie_2022 = renderPlotly({ pie_chart(2022) })
output$pie_2023 = renderPlotly({ pie_chart(2023) })
}
# Run the Shiny app
shinyApp(ui = ui, server = server)
In the pie chart, it is evident that the leading causes of death are heart disease and malignant neoplasms, followed by COVID-19. #### Change of proportion plot
# death by disease
death_sum_data <- death_proportion_data |>
group_by(mmwr_year, cause_of_death) |>
summarise(total_deaths = sum(death_count, na.rm = TRUE)) |>
ungroup()
## `summarise()` has grouped output by 'mmwr_year'. You can override using the
## `.groups` argument.
death_proportion_data <- death_sum_data |>
group_by(mmwr_year) |>
mutate(total_deaths_year = sum(total_deaths, na.rm = TRUE)) |>
ungroup() |>
mutate(death_proportion = total_deaths / total_deaths_year)
# death change
death_change_data <- death_proportion_data |>
arrange(cause_of_death, mmwr_year) |>
group_by(cause_of_death) |>
mutate(
lag_proportion = lag(death_proportion),
proportion_change = death_proportion - lag_proportion
) |>
ungroup()
# top 5 in change
top_changes_data <- death_change_data |>
group_by(mmwr_year) |>
slice_max(order_by = abs(proportion_change), n = 5) |>
ungroup()
proportion_plot = plot_ly(
data = top_changes_data,
x = ~as.factor(mmwr_year),
y = ~proportion_change,
color = ~cause_of_death,
colors = RColorBrewer::brewer.pal(12, "Set3"),
type = 'bar',
hoverinfo = 'x+y+name',
marker = list(opacity = 0.7)
) %>%
layout(
title = "Top 5 Causes of Death with the Largest Change in Proportion by Year",
xaxis = list(
title = "Year",
tickmode = "linear",
dtick = 1
),
yaxis = list(
title = "Change in Proportion of Total Deaths"
),
legend = list(
orientation = "v",
x = 1.15,
y = 1,
xanchor = "center",
font = list(size = 5)
),
plot_bgcolor = 'rgba(240,240,240,0.9)',
margin = list(t = 50),
showlegend = TRUE,
barmode = 'group'
)
proportion_plot
## Warning: Ignoring 13 observations
It can be observed that for COVID-19, its proportion of total deaths increased from 2020 to 2021 and then declined for the next two consecutive years. In contrast, heart disease and malignant neoplasms showed the opposite trend, with their proportions of total deaths decreasing from 2020 to 2021 and subsequently rising over the following two years.
What can we learn from death?
Have you ever thought about which week you are most likely to die in a year? Time Trend Analysis: By examining the fluctuations in death rates across the 52 weeks of a year, we can identify peak mortality periods and try to find underlying causes (death peaks and seasonal illnesses, public health crises).
Weekly trends might look different depending on where you are, as mortality rates are influenced by both environmental and healthcare system factors.
National and Regional Level Analysis Explore how death might change across time and regions.
City-Level Analysis Narrowing it down to a city like New York